library(NHANES) # One factor two levels nh_adult <- subset(NHANES, Age >= 18) x<-nh_adult$Gender y<-nh_adult$Height library(stats) tapply(y,x,length) tapply(y,x,summary) library(ggplot2) ggplot(nh_adult, aes(x=Gender, y=Height)) + geom_boxplot() t.test(y~x,alternative="less",var.equal=TRUE) # one-tailed test summary(lm(y~x)) #test for beta=0 in regression anova(lm(y~x)) #ANOVA F-test sqrt(anova(lm(y~x))$F[1]) #This equals the absolute value of the t-statistic #One factor multiple levels x<-nh_adult$Race1 tapply(y,x,length) tapply(y,x,summary) ggplot(nh_adult, aes(x=Race1, y=Height)) + geom_boxplot() summary(lm(y~x)) #test for beta=0 in regression anova(lm(y~x)) #ANOVA F-test # Understanding the use of different constraints options(contrasts=c("contr.treatment", "contr.treatment")) # options sets two contrasts. The first giving the function to be used with # unordered factors and the second the function to be used with ordered factors. # "contr.treatment" is the default corresponding to putting the first alpha to zero. summary(lm(y~x)) summary(lm(y~x-1)) #This corresponds to putting the ovreall mean to zero # contr.sum puts the sum to zero. options(contrasts=c("contr.sum", "contr.sum")) summary(lm(y~x)) # This does not report the last coefficient. But you can recover it using A<-lm(y~x) -sum(A$coefficients[2:5]) #Note that the following two are same A$coefficients[1]-sum(A$coefficients[2:5]) B<-lm(y~x-1) B$coefficients[5]